####################################
# Title - Event-Based Framing of Democracy in American News Media
# Date - May 28th, 2024
# Goal - Juncture analysis
####################################

rm(list=ls())

# Library
library(quanteda)
library(dplyr)
library(text2vec)
library(conText)
library(ggplot2)
library(lubridate)
library(tidyr)
library(PerformanceAnalytics) #correlation plot

# Dataset
data<-readRDS("data/nyt_cleaned_articles.rds")
glove_vectors <- readRDS("data/glove.rds") # Embedding model is already trained by stanford researchers
transform_vectors<-readRDS("data/khodakA.rds")
result_dem_mean_month<-readRDS("data/dictionary_demo2_cosine_mean_month.rds")

#time interval
time_intervals <- c(1,3,6,9,12)
#########################
#
# 1. Preprocessing
#
#########################

data$pub_month_factor<-as.factor(data$pub_month)
data$pub_year_factor<-as.factor(data$pub_year)
data$pub_year<-as.numeric(data$pub_year)
data$global<-1

text_corpus <- corpus(data,
                      docid_field = "docid",
                      text_field = "clean_text",
                      meta = T)

# tokenize corpus removing unnecessary (i.e. semantically uninformative) elements
toks <- tokens(text_corpus, remove_punct=T, remove_symbols=T, remove_numbers=F, remove_url = T, remove_separators=T)

# clean out stopwords and words with 2 or fewer characters
toks_nostop <- tokens_select(toks, pattern = stopwords("en"), selection = "remove", min_nchar=3)

# only use features that appear at least 5 times in the corpus
feats <- dfm(toks_nostop, tolower=T, verbose = FALSE) %>% dfm_trim(min_termfreq = 5) %>% featnames()

# check spelling. toupper avoids names being considered misspelled
if (requireNamespace("hunspell", quietly = TRUE)) {
  library(hunspell) # spell check library
  spellcheck <-  hunspell_check(toupper(feats), dict = hunspell::dictionary("en_US"))
  feats <- feats[spellcheck]
}

# leave the pads so that non-adjacent words will not become adjacent
toks_nostop_feats <- tokens_select(toks_nostop, feats, padding = TRUE)


# build a tokenized corpus of contexts surrounding the target term "democracy"
demo_pattern <- c("democracy", "Democracy", "DEMOCRACY",
                  "democracies", "Democracies", "DEMOCRACIES",
                  "democracy's", "Democracy's", "DEMOCRACY's",
                  "democratic")
demo_toks <- tokens_context(x = toks_nostop_feats, pattern = demo_pattern, window = 6L,
                            valuetype = "fixed", case_insensitive = FALSE, hard_cut = FALSE, rm_keyword = FALSE,
                            verbose = TRUE) #No regex which include "undemocracy

rm(text_corpus, toks, toks_nostop, feats)

# build document-feature matrix
demo_dfm <- dfm(demo_toks)

# build a document-embedding-matrix
demo_dem <- dem(x = demo_dfm, 
                pre_trained = glove_vectors, # pre-trained glovel model with 300-dim 
                transform = TRUE, 
                transform_matrix = transform_vectors, 
                verbose = TRUE)

##############################
#
# 2. Cosine similarity for each dictionary
#
##############################

#define dictionary
elec_dict <- c("multiparty", "one-vote", "enfranchisement", "suffrage", "voter", "voters", "election", "voting", "vote", "votes", "ballot")
lib_dict <-c("pluralism", "freedom", "freedoms", "liberty", "liberties", "rights", "individuality", 
             "constitutional", "constitutionalism", "constitutions")
parti_dict<-c("activism", "grassroots", "grass-roots", "movement", "demonstrations", "participation", 
              "rallies", "protest", "protests", "plebiscite")
delib_dict<-c("deliberative", "consensus-building", "consensus", "dialogue", "thoughtful", "deliberation")
egal_dict <-c("egalitarian","egalitarianism", "equality", "unequal","inequality","inequalities", "entitlement","welfare",
              "disparities", "disparity", "equal")
autho_dict<-c("meritocratic","elites","elitist","centralized", "top-down", "charisma")

all_dict <- c(elec_dict, lib_dict, parti_dict, delib_dict, egal_dict, autho_dict)

##################################
#
# 3. Define function
#
##################################
set_juncture <- function(toks, juncture_name, date1, date2, date3) {
  docvars(toks)$target_value <- ifelse(
    docvars(toks)$pub_date < as.Date(date1) & docvars(toks)$pub_date >= as.Date(date2), "Pre-juncture",
    ifelse(docvars(toks)$pub_date >= as.Date(date1) & docvars(toks)$pub_date <= as.Date(date3), "Pro-juncture", "out_window")
  )
  docvars(toks)<-docvars(toks) %>% rename(!!juncture_name := target_value)
  return(toks)
}
cal_cos <-function(toks, juncture_name, feature_dict){
  cos_juncture<-get_cos_sim(x = toks,
                            groups = docvars(toks, juncture_name),
                            features = feature_dict,
                            pre_trained = glove_vectors,
                            transform = TRUE,
                            transform_matrix = transform_vectors,
                            bootstrap = TRUE,
                            num_bootstraps = 100,
                            confidence_level = 0.95,
                            stem = FALSE,
                            as_list = FALSE)
  return(cos_juncture)
}

#########################
#
# Critical juncture - Homeland Security Act of 2002
#
#########################
# Call the function for each tokenized corpus
  juncture <- "junc_hsa"
  regression <-data.frame(coef = NA,
                          lower.ci = NA,
                          upper.ci = NA,
                          length   = NA,
                          model    = NA)
  
  t_test <- data.frame(lower.ci = NA,
                       upper.ci = NA,
                       length   = NA,
                       model    = NA)
  
  for (i in time_intervals){
    start_date = as.Date("2001-09-11")
    start_month = start_date %m-% months(i)
    end_month   = start_date %m+% months(i)
    
    #set docvars
    demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
    
    # Calculate cosine similirarty
    set.seed(2021L)
    juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
    juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
      feature %in% elec_dict ~ "Electoral Democracy",
      feature %in% lib_dict ~  "Liberal Democracy",
      feature %in% parti_dict ~ "Participatory Democracy",
      feature %in% delib_dict ~ "Deliberative Democracy",
      feature %in% egal_dict ~ "Egalitarian Democracy",
      feature %in% autho_dict ~ "Authoritarian Democracy"
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
      demo == "Electoral Democracy" ~ 1,
      demo == "Liberal Democracy" ~ 2,
      demo == "Participatory Democracy" ~ 3,
      demo == "Deliberative Democracy" ~ 4,
      demo == "Egalitarian Democracy" ~ 5,
      demo == "Authoritarian Democracy" ~ 6
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
    
    juncture_dem_cos_juncture %>% 
      ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
      geom_point(size = 1.5)+
      geom_errorbarh(height = 0.1) +
      xlab("Cosine similarity")+
      ylab("Key words")+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
      theme_bw()
    
    ggsave(paste0("fig/juncture/hsa/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #run the regression
    juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
    juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
      target < start_date ~ 0,
      TRUE ~ 1
    ))
    
    #visualize
    juncture_dem_mean_month %>%
      ggplot() +
      geom_point(aes(x = target, y = prevalence))+
      geom_line(aes(x = target, y = prevalence))+
      geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
      geom_vline(xintercept = start_date, color ="red")+
      xlab("") +
      ylab("") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
    ggsave(paste0("fig/juncture/hsa/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
   
    #regression
    juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
    
    for (k in 3:8) {
      response_var <- names(juncture_dem_mean_month_wide)[k]
      formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
      model <- lm(formula, data = juncture_dem_mean_month_wide)
      
      out <- data.frame(
        coef = coef(model)["juncture"],
        lower.ci = confint(model, 'juncture', level = 0.95)[1],
        upper.ci = confint(model, 'juncture', level = 0.95)[2],
        length = i,
        model = response_var)
      
      regression <- rbind(regression, out)
    }

    #t-test
    if(i==1){
      print("i is 1")
    } else {
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- t.test(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          lower.ci = model$conf.int[1],
          upper.ci = model$conf.int[2],
          length = i,
          model = response_var)
        
        t_test <- rbind(t_test, out)
        }
    }
    docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
  } 
 
  window_reg <- regression[-1,]
  window_reg %>%
    ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
  window_t <- t_test[-1,]
  window_t %>%
    ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)

  #Find real examples
    juncture <- "junc_hsa"
    time_intervals = 6
    start_date = as.Date("2001-09-11")
    start_month = start_date %m-% months(time_intervals)
    end_month   = start_date %m+% months(time_intervals)
    
    #set docvars
    demo_juncture_toks <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
    # build document-feature matrix
    demo_juncture_dfm <- dfm(demo_juncture_toks)
    
    # build a document-embedding-matrix
    demo_juncture_dem <- dem(x = demo_juncture_dfm, 
                             pre_trained = glove_vectors, # pre-trained glovel model with 300-dim 
                             transform = TRUE, 
                             transform_matrix = transform_vectors, 
                             verbose = TRUE)
    
    # to get group-specific embeddings, average within party
    demo_juncture_wv <- dem_group(demo_juncture_dem, groups = demo_juncture_dem@docvars$junc_hsa)
    dim(demo_juncture_wv)
    
    #pre-juncture contexts
    pre_list_ncs<-ncs(x = demo_juncture_wv["Pre-juncture",], 
                      contexts_dem = demo_juncture_dem[demo_juncture_dem@docvars$junc_hsa == "Pre-juncture",],
                      contexts = demo_juncture_toks, N = 179107, as_list = FALSE)
    pre_text_ncs<-ncs(x = demo_juncture_wv["Pre-juncture",], 
                      contexts_dem = demo_juncture_dem[demo_juncture_dem@docvars$junc_hsa == "Pre-juncture",],
                      contexts = NULL, N = 179107, as_list = FALSE)
    pre_ncs<-merge(pre_list_ncs, pre_text_ncs %>% select(-value, -target) %>% rename(text_num = context), by=c("rank"))
    
    result <-data.frame(words = NA,
                        example = NA)
    
    for(i in lib_dict){
      a<-grep(pattern = i, pre_ncs$context, ignore.case = TRUE)
      if (length(a) > 0) {
        b <- data.frame(words = rep(i, length(a)),
                        example = a)
        result <- rbind(result, b)
      }
    }
    
    result<-result[-1,]
    pre_ncs<-pre_ncs[result$example,]
    pre_ncs<-cbind(pre_ncs, result) %>% select(-target, -rank)
    
    for (i in 1:nrow(pre_ncs)){
      key<-gsub(pattern = "text", replacement = "", pre_ncs$text[i], ignore.case = TRUE)
      key<-as.numeric(key)
      b<-demo_juncture_dem@docvars$full_text[key]
      pre_ncs$real[i]<-b
    }
    
    #pro-juncture contexts
    pro_list_ncs<-ncs(x = demo_juncture_wv["Pro-juncture",], 
                      contexts_dem = demo_juncture_dem[demo_juncture_dem@docvars$junc_hsa == "Pro-juncture",],
                      contexts = demo_juncture_toks, N = 179107, as_list = FALSE)
    pro_text_ncs<-ncs(x = demo_juncture_wv["Pro-juncture",], 
                      contexts_dem = demo_juncture_dem[demo_juncture_dem@docvars$junc_hsa == "Pro-juncture",],
                      contexts = NULL, N = 179107, as_list = FALSE)
    pro_ncs<-merge(pro_list_ncs, pro_text_ncs %>% select(-value, -target) %>% rename(text_num = context), by=c("rank"))
    
    result <-data.frame(words = NA,
                        example = NA)
    
    for(i in lib_dict){
      a<-grep(pattern = i, pro_ncs$context, ignore.case = TRUE)
      if (length(a) > 0) {
        b <- data.frame(words = rep(i, length(a)),
                        example = a)
        result <- rbind(result, b)
      }
    }
    
    result<-result[-1,]
    pro_ncs<-pro_ncs[result$example,]
    pro_ncs<-cbind(pro_ncs, result) %>% select(-target, -rank)
    
    for (i in 1:nrow(pro_ncs)){
      key<-gsub(pattern = "text", replacement = "", pro_ncs$text[i], ignore.case = TRUE)
      key<-as.numeric(key)
      b<-demo_juncture_dem@docvars$full_text[key]
      pro_ncs$real[i]<-b
    }
   
    #combine
    pre_ncs$juncture <-"Pre-juncture"
    pro_ncs$juncture <-"Pro-juncture"

    juncture_ncs <- rbind(pre_ncs, pro_ncs) %>% arrange(words)
#########################
#
# Critical juncture - Black Lives Matter
#
#########################
  ##Death of Travon Martin - 2012-02-26
  juncture <- "junc_travon"
  regression <-data.frame(coef = NA,
                          lower.ci = NA,
                          upper.ci = NA,
                          length   = NA,
                          model    = NA)
  t_test <- data.frame(lower.ci = NA,
                       upper.ci = NA,
                       length   = NA,
                       model    = NA)
  
  for (i in time_intervals){
    start_date = as.Date("2012-02-26")
    start_month = start_date %m-% months(i)
    end_month   = start_date %m+% months(i)
    
    #set docvars
    demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
    
    # Calculate cosine similirarty
    set.seed(2021L)
    juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
    juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
      feature %in% elec_dict ~ "Electoral Democracy",
      feature %in% lib_dict ~  "Liberal Democracy",
      feature %in% parti_dict ~ "Participatory Democracy",
      feature %in% delib_dict ~ "Deliberative Democracy",
      feature %in% egal_dict ~ "Egalitarian Democracy",
      feature %in% autho_dict ~ "Authoritarian Democracy"
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
      demo == "Electoral Democracy" ~ 1,
      demo == "Liberal Democracy" ~ 2,
      demo == "Participatory Democracy" ~ 3,
      demo == "Deliberative Democracy" ~ 4,
      demo == "Egalitarian Democracy" ~ 5,
      demo == "Authoritarian Democracy" ~ 6
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
    
    juncture_dem_cos_juncture %>% 
      ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
      geom_point(size = 1.5)+
      geom_errorbarh(height = 0.1) +
      xlab("Cosine similarity")+
      ylab("Key words")+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
      theme_bw()
    
    ggsave(paste0("fig/juncture/blm/travon_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #run the regression
    juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
    juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
      target <start_date ~ 0,
      TRUE ~ 1
    ))
    
    #visualize
    juncture_dem_mean_month %>%
      ggplot() +
      geom_point(aes(x = target, y = prevalence))+
      geom_line(aes(x = target, y = prevalence))+
      geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
      geom_vline(xintercept = start_date, color ="red")+
      xlab("") +
      ylab("") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
    ggsave(paste0("fig/juncture/blm/blm_travon_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #regression
    juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
    
    for (k in 3:8) {
      response_var <- names(juncture_dem_mean_month_wide)[k]
      formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
      model <- lm(formula, data = juncture_dem_mean_month_wide)
      
      out <- data.frame(
        coef = coef(model)["juncture"],
        lower.ci = confint(model, 'juncture', level = 0.95)[1],
        upper.ci = confint(model, 'juncture', level = 0.95)[2],
        length = i,
        model = response_var)
      
      regression <- rbind(regression, out)
    }
    #t-test
    if(i==1){
      print("i is 1")
    } else {
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- t.test(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          lower.ci = model$conf.int[1],
          upper.ci = model$conf.int[2],
          length = i,
          model = response_var)
        
        t_test <- rbind(t_test, out)
      }
    }
    docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
  } 
  
  window_reg <- regression[-1,]
  window_reg %>%
    ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  
  ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
  window_t <- t_test[-1,]
  window_t %>%
    ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
  ##Death of Floyd - 2020-05-25
  juncture <- "junc_floyd"
  regression <-data.frame(coef = NA,
                          lower.ci = NA,
                          upper.ci = NA,
                          length   = NA,
                          model    = NA)
  t_test <- data.frame(lower.ci = NA,
                       upper.ci = NA,
                       length   = NA,
                       model    = NA)
  
  for (i in time_intervals){
    start_date = as.Date("2020-05-25")
    start_month = start_date %m-% months(i)
    end_month   = start_date %m+% months(i)
    
    #set docvars
    demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
    
    # Calculate cosine similirarty
    set.seed(2021L)
    juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
    juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
      feature %in% elec_dict ~ "Electoral Democracy",
      feature %in% lib_dict ~  "Liberal Democracy",
      feature %in% parti_dict ~ "Participatory Democracy",
      feature %in% delib_dict ~ "Deliberative Democracy",
      feature %in% egal_dict ~ "Egalitarian Democracy",
      feature %in% autho_dict ~ "Authoritarian Democracy"
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
      demo == "Electoral Democracy" ~ 1,
      demo == "Liberal Democracy" ~ 2,
      demo == "Participatory Democracy" ~ 3,
      demo == "Deliberative Democracy" ~ 4,
      demo == "Egalitarian Democracy" ~ 5,
      demo == "Authoritarian Democracy" ~ 6
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
    
    juncture_dem_cos_juncture %>% 
      ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
      geom_point(size = 1.5)+
      geom_errorbarh(height = 0.1) +
      xlab("Cosine similarity")+
      ylab("Key words")+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
      theme_bw()
    
    ggsave(paste0("fig/juncture/blm/floyd_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #run the regression
    juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
    juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
      target <start_date ~ 0,
      TRUE ~ 1
    ))
    
    #visualize
    juncture_dem_mean_month %>%
      ggplot() +
      geom_point(aes(x = target, y = prevalence))+
      geom_line(aes(x = target, y = prevalence))+
      geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
      geom_vline(xintercept = start_date, color ="red")+
      xlab("") +
      ylab("") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
    ggsave(paste0("fig/juncture/blm/blm_floyd_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #regression
    juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                              names_from = demo, values_from = prevalence)
    
    for (k in 3:8) {
      response_var <- names(juncture_dem_mean_month_wide)[k]
      formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
      model <- lm(formula, data = juncture_dem_mean_month_wide)
      
      out <- data.frame(
        coef = coef(model)["juncture"],
        lower.ci = confint(model, 'juncture', level = 0.95)[1],
        upper.ci = confint(model, 'juncture', level = 0.95)[2],
        length = i,
        model = response_var)
      
      regression <- rbind(regression, out)
    }
    #t-test
    if(i==1){
      print("i is 1")
    } else {
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- t.test(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          lower.ci = model$conf.int[1],
          upper.ci = model$conf.int[2],
          length = i,
          model = response_var)
        
        t_test <- rbind(t_test, out)
      }
    }
    docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
  } 
  
  window_reg <- regression[-1,]
  window_reg %>%
    ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  
  ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
  window_t <- t_test[-1,]
  window_t %>%
    ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
#########################
#
# Critical juncture - Trump
#
######################### 
  #Trump - announced June 16, 2015/ primary - Feb 1 2016 / elected November 8, 2016
  
  ##Primary 
  juncture <- "junc_trump_primary"
  regression <-data.frame(coef = NA,
                          lower.ci = NA,
                          upper.ci = NA,
                          length   = NA,
                          model    = NA)
  t_test <- data.frame(lower.ci = NA,
                       upper.ci = NA,
                       length   = NA,
                       model    = NA)
  for (i in time_intervals){
    start_date = as.Date("2016-02-01")
    start_month = start_date %m-% months(i)
    end_month   = start_date %m+% months(i)
    
    #set docvars
    demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
    
    # Calculate cosine similirarty
    set.seed(2021L)
    juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
    juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
      feature %in% elec_dict ~ "Electoral Democracy",
      feature %in% lib_dict ~  "Liberal Democracy",
      feature %in% parti_dict ~ "Participatory Democracy",
      feature %in% delib_dict ~ "Deliberative Democracy",
      feature %in% egal_dict ~ "Egalitarian Democracy",
      feature %in% autho_dict ~ "Authoritarian Democracy"
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
      demo == "Electoral Democracy" ~ 1,
      demo == "Liberal Democracy" ~ 2,
      demo == "Participatory Democracy" ~ 3,
      demo == "Deliberative Democracy" ~ 4,
      demo == "Egalitarian Democracy" ~ 5,
      demo == "Authoritarian Democracy" ~ 6
    ))
    
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
    juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
    
    juncture_dem_cos_juncture %>% 
      ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
      geom_point(size = 1.5)+
      geom_errorbarh(height = 0.1) +
      xlab("Cosine similarity")+
      ylab("Key words")+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
      theme_bw()
    
    ggsave(paste0("fig/juncture/trump/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #run the regression
    juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
    juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
      target <start_date ~ 0,
      TRUE ~ 1
    ))
    
    #visualize
    juncture_dem_mean_month %>%
      ggplot() +
      geom_point(aes(x = target, y = prevalence))+
      geom_line(aes(x = target, y = prevalence))+
      geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
      geom_vline(xintercept = start_date, color ="red")+
      xlab("") +
      ylab("") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
    ggsave(paste0("fig/juncture/trump/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
    
    #regression
    juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                              names_from = demo, values_from = prevalence)
    if(i==1){
      print("i is 1")
    } else {
      for (k in 3:8) {
      response_var <- names(juncture_dem_mean_month_wide)[k]
      formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
      model <- lm(formula, data = juncture_dem_mean_month_wide)
      
      out <- data.frame(
        coef = coef(model)["juncture"],
        lower.ci = confint(model, 'juncture', level = 0.95)[1],
        upper.ci = confint(model, 'juncture', level = 0.95)[2],
        length = i,
        model = response_var)
      
      regression <- rbind(regression, out)
      }
    }
    #t-test
    if(i==1){
      print("i is 1")
    } else {
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- t.test(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          lower.ci = model$conf.int[1],
          upper.ci = model$conf.int[2],
          length = i,
          model = response_var)
        
        t_test <- rbind(t_test, out)
      }
    }
    docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
  } 
  
  window_reg <- regression[-1,]
  window_reg %>%
    ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  
  ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
  window_t <- t_test[-1,]
  window_t %>%
    ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
    geom_point() +
    geom_errorbar() +
    geom_hline(yintercept = 0, color = "red") +
    facet_wrap(~model) +
    theme_bw()
  ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
  
    
  ##Announcement
    juncture <- "junc_trump_announce"
    regression <-data.frame(coef = NA,
                            lower.ci = NA,
                            upper.ci = NA,
                            length   = NA,
                            model    = NA)
    t_test <- data.frame(lower.ci = NA,
                         upper.ci = NA,
                         length   = NA,
                         model    = NA)
    for (i in time_intervals){
      start_date = as.Date("2015-06-16")
      start_month = start_date %m-% months(i)
      end_month   = start_date %m+% months(i)
      
      #set docvars
      demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
      
      # Calculate cosine similirarty
      set.seed(2021L)
      juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
      juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
        feature %in% elec_dict ~ "Electoral Democracy",
        feature %in% lib_dict ~  "Liberal Democracy",
        feature %in% parti_dict ~ "Participatory Democracy",
        feature %in% delib_dict ~ "Deliberative Democracy",
        feature %in% egal_dict ~ "Egalitarian Democracy",
        feature %in% autho_dict ~ "Authoritarian Democracy"
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
        demo == "Electoral Democracy" ~ 1,
        demo == "Liberal Democracy" ~ 2,
        demo == "Participatory Democracy" ~ 3,
        demo == "Deliberative Democracy" ~ 4,
        demo == "Egalitarian Democracy" ~ 5,
        demo == "Authoritarian Democracy" ~ 6
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
      
      juncture_dem_cos_juncture %>% 
        ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
        geom_point(size = 1.5)+
        geom_errorbarh(height = 0.1) +
        xlab("Cosine similarity")+
        ylab("Key words")+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
        theme_bw()
      
      ggsave(paste0("fig/juncture/trump/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #run the regression
      juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
      juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
        target <start_date ~ 0,
        TRUE ~ 1
      ))
      
      #visualize
      juncture_dem_mean_month %>%
        ggplot() +
        geom_point(aes(x = target, y = prevalence))+
        geom_line(aes(x = target, y = prevalence))+
        geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
        geom_vline(xintercept = start_date, color ="red")+
        xlab("") +
        ylab("") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
      ggsave(paste0("fig/juncture/trump/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #regression
      juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
      
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- lm(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            coef = coef(model)["juncture"],
            lower.ci = confint(model, 'juncture', level = 0.95)[1],
            upper.ci = confint(model, 'juncture', level = 0.95)[2],
            length = i,
            model = response_var)
          
          regression <- rbind(regression, out)
        }
      }
      #t-test
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- t.test(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            lower.ci = model$conf.int[1],
            upper.ci = model$conf.int[2],
            length = i,
            model = response_var)
          
          t_test <- rbind(t_test, out)
        }
      }
      docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
    } 
    
    window_reg <- regression[-1,]
    window_reg %>%
      ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    
    ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    window_t <- t_test[-1,]
    window_t %>%
      ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
  ## Elected
    juncture <- "junc_trump_election"
    regression <-data.frame(coef = NA,
                            lower.ci = NA,
                            upper.ci = NA,
                            length   = NA,
                            model    = NA)
    t_test <- data.frame(lower.ci = NA,
                         upper.ci = NA,
                         length   = NA,
                         model    = NA)
    for (i in time_intervals){
      start_date = as.Date("2016-11-08")
      start_month = start_date %m-% months(i)
      end_month   = start_date %m+% months(i)
      
      #set docvars
      demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
      
      # Calculate cosine similirarty
      set.seed(2021L)
      juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
      juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
        feature %in% elec_dict ~ "Electoral Democracy",
        feature %in% lib_dict ~  "Liberal Democracy",
        feature %in% parti_dict ~ "Participatory Democracy",
        feature %in% delib_dict ~ "Deliberative Democracy",
        feature %in% egal_dict ~ "Egalitarian Democracy",
        feature %in% autho_dict ~ "Authoritarian Democracy"
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
        demo == "Electoral Democracy" ~ 1,
        demo == "Liberal Democracy" ~ 2,
        demo == "Participatory Democracy" ~ 3,
        demo == "Deliberative Democracy" ~ 4,
        demo == "Egalitarian Democracy" ~ 5,
        demo == "Authoritarian Democracy" ~ 6
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
      
      juncture_dem_cos_juncture %>% 
        ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
        geom_point(size = 1.5)+
        geom_errorbarh(height = 0.1) +
        xlab("Cosine similarity")+
        ylab("Key words")+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
        theme_bw()
      
      ggsave(paste0("fig/juncture/trump/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #run the regression
      juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
      juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
        target <start_date ~ 0,
        TRUE ~ 1
      ))
      
      #visualize
      juncture_dem_mean_month %>%
        ggplot() +
        geom_point(aes(x = target, y = prevalence))+
        geom_line(aes(x = target, y = prevalence))+
        geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
        geom_vline(xintercept = start_date, color ="red")+
        xlab("") +
        ylab("") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
      ggsave(paste0("fig/juncture/trump/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #regression
      juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
      
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- lm(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            coef = coef(model)["juncture"],
            lower.ci = confint(model, 'juncture', level = 0.95)[1],
            upper.ci = confint(model, 'juncture', level = 0.95)[2],
            length = i,
            model = response_var)
          
          regression <- rbind(regression, out)
        }
      }
      #t-test
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- t.test(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            lower.ci = model$conf.int[1],
            upper.ci = model$conf.int[2],
            length = i,
            model = response_var)
          
          t_test <- rbind(t_test, out)
        }
      }
      docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
    } 
    
    window_reg <- regression[-1,]
    window_reg %>%
      ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    
    ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    window_t <- t_test[-1,]
    window_t %>%
      ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
#########################
#
# Critical juncture -  Occupy Wall Street
#
#########################
  
    ## 2008-01-01
    juncture <- "junc_ows_first"
    regression <-data.frame(coef = NA,
                            lower.ci = NA,
                            upper.ci = NA,
                            length   = NA,
                            model    = NA)
    t_test <- data.frame(lower.ci = NA,
                         upper.ci = NA,
                         length   = NA,
                         model    = NA)
    for (i in time_intervals){
      start_date = as.Date("2008-01-01")
      start_month = start_date %m-% months(i)
      end_month   = start_date %m+% months(i)
      
      #set docvars
      demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
      
      # Calculate cosine similirarty
      set.seed(2021L)
      juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
      juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
        feature %in% elec_dict ~ "Electoral Democracy",
        feature %in% lib_dict ~  "Liberal Democracy",
        feature %in% parti_dict ~ "Participatory Democracy",
        feature %in% delib_dict ~ "Deliberative Democracy",
        feature %in% egal_dict ~ "Egalitarian Democracy",
        feature %in% autho_dict ~ "Authoritarian Democracy"
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
        demo == "Electoral Democracy" ~ 1,
        demo == "Liberal Democracy" ~ 2,
        demo == "Participatory Democracy" ~ 3,
        demo == "Deliberative Democracy" ~ 4,
        demo == "Egalitarian Democracy" ~ 5,
        demo == "Authoritarian Democracy" ~ 6
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
      
      juncture_dem_cos_juncture %>% 
        ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
        geom_point(size = 1.5)+
        geom_errorbarh(height = 0.1) +
        xlab("Cosine similarity")+
        ylab("Key words")+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
        theme_bw()
      
      ggsave(paste0("fig/juncture/ows/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #run the regression
      juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
      juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
        target <start_date ~ 0,
        TRUE ~ 1
      ))
      
      #visualize
      juncture_dem_mean_month %>%
        ggplot() +
        geom_point(aes(x = target, y = prevalence))+
        geom_line(aes(x = target, y = prevalence))+
        geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
        geom_vline(xintercept = start_date, color ="red")+
        xlab("") +
        ylab("") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
      ggsave(paste0("fig/juncture/ows/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #regression
      juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
      
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- lm(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          coef = coef(model)["juncture"],
          lower.ci = confint(model, 'juncture', level = 0.95)[1],
          upper.ci = confint(model, 'juncture', level = 0.95)[2],
          length = i,
          model = response_var)
        
        regression <- rbind(regression, out)
      }
      #t-test
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- t.test(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            lower.ci = model$conf.int[1],
            upper.ci = model$conf.int[2],
            length = i,
            model = response_var)
          
          t_test <- rbind(t_test, out)
        }
      }
      docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
    } 
    
    window_reg <- regression[-1,]
    window_reg %>%
      ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    
    ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    window_t <- t_test[-1,]
    window_t %>%
      ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    
    ##Announcement
    juncture <- "junc_ows_second"
    regression <-data.frame(coef = NA,
                            lower.ci = NA,
                            upper.ci = NA,
                            length   = NA,
                            model    = NA)
    t_test <- data.frame(lower.ci = NA,
                         upper.ci = NA,
                         length   = NA,
                         model    = NA)
    for (i in time_intervals){
      start_date = as.Date("2009-06-01")
      start_month = start_date %m-% months(i)
      end_month   = start_date %m+% months(i)
      
      #set docvars
      demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
      
      # Calculate cosine similirarty
      set.seed(2021L)
      juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
      juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
        feature %in% elec_dict ~ "Electoral Democracy",
        feature %in% lib_dict ~  "Liberal Democracy",
        feature %in% parti_dict ~ "Participatory Democracy",
        feature %in% delib_dict ~ "Deliberative Democracy",
        feature %in% egal_dict ~ "Egalitarian Democracy",
        feature %in% autho_dict ~ "Authoritarian Democracy"
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
        demo == "Electoral Democracy" ~ 1,
        demo == "Liberal Democracy" ~ 2,
        demo == "Participatory Democracy" ~ 3,
        demo == "Deliberative Democracy" ~ 4,
        demo == "Egalitarian Democracy" ~ 5,
        demo == "Authoritarian Democracy" ~ 6
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
      
      juncture_dem_cos_juncture %>% 
        ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
        geom_point(size = 1.5)+
        geom_errorbarh(height = 0.1) +
        xlab("Cosine similarity")+
        ylab("Key words")+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
        theme_bw()
      
      ggsave(paste0("fig/juncture/ows/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #run the regression
      juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
      juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
        target <start_date ~ 0,
        TRUE ~ 1
      ))
      
      #visualize
      juncture_dem_mean_month %>%
        ggplot() +
        geom_point(aes(x = target, y = prevalence))+
        geom_line(aes(x = target, y = prevalence))+
        geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
        geom_vline(xintercept = start_date, color ="red")+
        xlab("") +
        ylab("") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
      ggsave(paste0("fig/juncture/ows/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #regression
      juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
      
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- lm(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          coef = coef(model)["juncture"],
          lower.ci = confint(model, 'juncture', level = 0.95)[1],
          upper.ci = confint(model, 'juncture', level = 0.95)[2],
          length = i,
          model = response_var)
        
        regression <- rbind(regression, out)
      }
      #t-test
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- t.test(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            lower.ci = model$conf.int[1],
            upper.ci = model$conf.int[2],
            length = i,
            model = response_var)
          
          t_test <- rbind(t_test, out)
        }
      }
      docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
    } 
    
    window_reg <- regression[-1,]
    window_reg %>%
      ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    
    ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    window_t <- t_test[-1,]
    window_t %>%
      ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    ## Elected
    juncture <- "junc_ows_third"
    regression <-data.frame(coef = NA,
                            lower.ci = NA,
                            upper.ci = NA,
                            length   = NA,
                            model    = NA)
    t_test <- data.frame(lower.ci = NA,
                         upper.ci = NA,
                         length   = NA,
                         model    = NA)
    for (i in time_intervals){
      start_date = as.Date("2011-09-17")
      start_month = start_date %m-% months(i)
      end_month   = start_date %m+% months(i)
      
      #set docvars
      demo_toks  <- set_juncture(demo_toks,  juncture, start_date, start_month, end_month)
      
      # Calculate cosine similirarty
      set.seed(2021L)
      juncture_dem_cos_juncture  <- cal_cos(demo_toks, juncture, all_dict)
      juncture_dem_cos_juncture <- juncture_dem_cos_juncture %>% filter(target!="out_window") %>% mutate(demo=case_when(
        feature %in% elec_dict ~ "Electoral Democracy",
        feature %in% lib_dict ~  "Liberal Democracy",
        feature %in% parti_dict ~ "Participatory Democracy",
        feature %in% delib_dict ~ "Deliberative Democracy",
        feature %in% egal_dict ~ "Egalitarian Democracy",
        feature %in% autho_dict ~ "Authoritarian Democracy"
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% mutate(model_num = case_when(
        demo == "Electoral Democracy" ~ 1,
        demo == "Liberal Democracy" ~ 2,
        demo == "Participatory Democracy" ~ 3,
        demo == "Deliberative Democracy" ~ 4,
        demo == "Egalitarian Democracy" ~ 5,
        demo == "Authoritarian Democracy" ~ 6
      ))
      
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(feature)
      juncture_dem_cos_juncture<-juncture_dem_cos_juncture %>% arrange(model_num)
      
      juncture_dem_cos_juncture %>% 
        ggplot(aes(x=value, y=feature, color=target, group=target, xmin = lower.ci, xmax=upper.ci))+
        geom_point(size = 1.5)+
        geom_errorbarh(height = 0.1) +
        xlab("Cosine similarity")+
        ylab("Key words")+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)+
        theme_bw()
      
      ggsave(paste0("fig/juncture/ows/", juncture, "_keyword_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #run the regression
      juncture_dem_mean_month <- result_dem_mean_month %>% filter(target>= start_month & target<=end_month)
      juncture_dem_mean_month <- juncture_dem_mean_month %>% mutate(juncture = case_when(
        target <start_date ~ 0,
        TRUE ~ 1
      ))
      
      #visualize
      juncture_dem_mean_month %>%
        ggplot() +
        geom_point(aes(x = target, y = prevalence))+
        geom_line(aes(x = target, y = prevalence))+
        geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
        geom_vline(xintercept = start_date, color ="red")+
        xlab("") +
        ylab("") +
        theme_bw() +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
      ggsave(paste0("fig/juncture/ows/", juncture, "_mean_", i, "month.jpeg"), width = 12, height = 8, dpi = 1000)
      
      #regression
      juncture_dem_mean_month_wide<-pivot_wider(juncture_dem_mean_month %>% select(!model_num),
                                                names_from = demo, values_from = prevalence)
      
      for (k in 3:8) {
        response_var <- names(juncture_dem_mean_month_wide)[k]
        formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
        model <- lm(formula, data = juncture_dem_mean_month_wide)
        
        out <- data.frame(
          coef = coef(model)["juncture"],
          lower.ci = confint(model, 'juncture', level = 0.95)[1],
          upper.ci = confint(model, 'juncture', level = 0.95)[2],
          length = i,
          model = response_var)
        
        regression <- rbind(regression, out)
      }
      #t-test
      if(i==1){
        print("i is 1")
      } else {
        for (k in 3:8) {
          response_var <- names(juncture_dem_mean_month_wide)[k]
          formula <- as.formula(paste("`", response_var, "` ~ juncture", sep = ""))
          model <- t.test(formula, data = juncture_dem_mean_month_wide)
          
          out <- data.frame(
            lower.ci = model$conf.int[1],
            upper.ci = model$conf.int[2],
            length = i,
            model = response_var)
          
          t_test <- rbind(t_test, out)
        }
      }
      docvars(demo_toks)<-docvars(demo_toks) %>% select(!juncture)
    } 
    
    window_reg <- regression[-1,]
    window_reg %>%
      ggplot(aes(x = factor(length), y = coef, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    
    ggsave(paste0("fig/juncture/", juncture, "_regre_coef.jpeg"), width = 12, height = 8, dpi = 1000)
    
    window_t <- t_test[-1,]
    window_t %>%
      ggplot(aes(x = factor(length), y = (upper.ci+lower.ci)/2, ymax = upper.ci, ymin = lower.ci)) +
      geom_point() +
      geom_errorbar() +
      geom_hline(yintercept = 0, color = "red") +
      facet_wrap(~model) +
      theme_bw()
    ggsave(paste0("fig/juncture/", juncture, "_ttest_coef.jpeg"), width = 12, height = 8, dpi = 1000)